{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Differentially Private Histograms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plotting the distribution of ages in `Adult`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from diffprivlib import tools as dp\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We first read in the list of ages in the Adult UCI dataset (the first column)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ages_adult = np.loadtxt(\"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\",\n",
    "                        usecols=0, delimiter=\", \")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using Numpy's native `histogram` function, we can find the distribution of ages, as determined by ten equally-spaced bins calculated by `histogram`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "hist, bins = np.histogram(ages_adult)\n",
    "hist = hist / hist.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using `matplotlib.pyplot`, we can plot a barchart of the histogram distribution."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFQJJREFUeJzt3X+QXeV93/H3J5KlOG7NL20yRIKsPChxZbuRg5DJJGYaXGxRXMRMhS2GsaHDRM3ETJOmSS1PJkqj2jMw0ylNptS1EsA2sREEx/GOkas4xs4faU20YBkkiOpFqGhlEmR+OY0DWObbP+4j5/p6xZ5drXavzPs1c2bPec5zzvmevbv70XnOvUepKiRJ+qGFLkCSNBwMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJahYvdAEzsWzZshodHV3oMiTplHL//fd/o6pGput3SgXC6Ogo4+PjC12GJJ1SkvzfLv0cMpIkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBp9gnlXXqGN1yz7we7+ANl83r8aQfRF4hSJIAA0GS1BgIkiTAewg/8OZzLN9xfOnU5hWCJAkwECRJjYEgSQIMBElS403leeCNXUmnAq8QJEmAgSBJajoFQpL1SfYnmUiyZYr1FyV5IMnRJBv72n8hyZ6+6fkkV7R1H03yWN+6NXN3WpKkmZr2HkKSRcDNwCXAJLA7yVhVPdzX7XHgWuDX+7etqi8Ca9p+zgQmgD/t6/IbVXX3iZyAJGludLmpvA6YqKoDAEl2ABuA7wZCVR1s6156mf1sBD5XVd+adbWSpJOmy5DRcuBQ3/Jka5upTcAdA20fSvJgkpuSLJ1qoySbk4wnGT9y5MgsDitJ6mJebionORt4E7Crr/kDwOuBC4AzgfdPtW1Vba+qtVW1dmRk5KTXKkmvVF0C4TBwTt/yitY2E+8CPl1V3z7WUFVPVM8LwG30hqYkSQukSyDsBlYlWZlkCb2hn7EZHucqBoaL2lUDSQJcAeyd4T4lSXNo2kCoqqPA9fSGex4B7qqqfUm2JbkcIMkFSSaBK4GPJNl3bPsko/SuMP58YNefSPIQ8BCwDPjgiZ+OJGm2Oj26oqp2AjsH2rb2ze+mN5Q01bYHmeImdFVdPJNCT5T/x68kvTw/qSxJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElS0ykQkqxPsj/JRJItU6y/KMkDSY4m2Tiw7jtJ9rRprK99ZZL72j7vTLLkxE9HkjRb0wZCkkXAzcClwGrgqiSrB7o9DlwLfHKKXfx9Va1p0+V97TcCN1XVecAzwHWzqF+SNEe6XCGsAyaq6kBVvQjsADb0d6iqg1X1IPBSl4MmCXAxcHdr+hhwReeqJUlzrksgLAcO9S1PtraufjjJeJIvJzn2R/8s4NmqOjrLfUqS5tjieTjGT1TV4SSvA+5N8hDwXNeNk2wGNgOce+65J6lESVKXK4TDwDl9yytaWydVdbh9PQB8CXgz8BRwepJjgXTcfVbV9qpaW1VrR0ZGuh5WkjRDXQJhN7CqvStoCbAJGJtmGwCSnJFkaZtfBvwc8HBVFfBF4Ng7kq4BPjPT4iVJc2faQGjj/NcDu4BHgLuqal+SbUkuB0hyQZJJ4ErgI0n2tc3/CTCe5Kv0AuCGqnq4rXs/8GtJJujdU7hlLk9MkjQzne4hVNVOYOdA29a++d30hn0Gt/tfwJuOs88D9N7BJEkaAn5SWZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqSmUyAkWZ9kf5KJJFumWH9RkgeSHE2ysa99TZL/nWRfkgeTvLtv3UeTPJZkT5vWzM0pSZJmY9r/UznJIuBm4BJgEtidZKyqHu7r9jhwLfDrA5t/C3hvVX0tyY8D9yfZVVXPtvW/UVV3n+hJSJJO3LSBAKwDJqrqAECSHcAG4LuBUFUH27qX+jesqv/TN//1JE8CI8CzSJKGSpcho+XAob7lydY2I0nWAUuAR/uaP9SGkm5KsnSm+5QkzZ15uamc5GzgduBfV9Wxq4gPAK8HLgDOBN5/nG03JxlPMn7kyJH5KFeSXpG6BMJh4Jy+5RWtrZMkrwXuAX6zqr58rL2qnqieF4Db6A1NfZ+q2l5Va6tq7cjISNfDSpJmqEsg7AZWJVmZZAmwCRjrsvPW/9PAxwdvHrerBpIEuALYO5PCJUlza9pAqKqjwPXALuAR4K6q2pdkW5LLAZJckGQSuBL4SJJ9bfN3ARcB107x9tJPJHkIeAhYBnxwTs9MkjQjXd5lRFXtBHYOtG3tm99NbyhpcLs/BP7wOPu8eEaVSh2NbrlnXo938IbL5vV40sniJ5UlSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRLQMRCSrE+yP8lEki1TrL8oyQNJjibZOLDumiRfa9M1fe3nJ3mo7fP3kuTET0eSNFvTBkKSRcDNwKXAauCqJKsHuj0OXAt8cmDbM4HfBt4CrAN+O8kZbfWHgV8EVrVp/azPQpJ0wrpcIawDJqrqQFW9COwANvR3qKqDVfUg8NLAtu8APl9VT1fVM8DngfVJzgZeW1VfrqoCPg5ccaInI0mavS6BsBw41Lc82dq6ON62y9v8tPtMsjnJeJLxI0eOdDysJGmmhv6mclVtr6q1VbV2ZGRkocuRpB9YXQLhMHBO3/KK1tbF8bY93OZns09J0knQJRB2A6uSrEyyBNgEjHXc/y7g7UnOaDeT3w7sqqongG8mubC9u+i9wGdmUb8kaY5MGwhVdRS4nt4f90eAu6pqX5JtSS4HSHJBkkngSuAjSfa1bZ8G/hO9UNkNbGttAL8M/AEwATwKfG5Oz0ySNCOLu3Sqqp3AzoG2rX3zu/neIaD+frcCt07RPg68cSbFSpJOnqG/qSxJmh8GgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1nf7HNEndjG65Z96OdfCGy+btWHpl6HSFkGR9kv1JJpJsmWL90iR3tvX3JRlt7Vcn2dM3vZRkTVv3pbbPY+t+dC5PTJI0M9MGQpJFwM3ApcBq4Kokqwe6XQc8U1XnATcBNwJU1Seqak1VrQHeAzxWVXv6trv62PqqenIOzkeSNEtdrhDWARNVdaCqXgR2ABsG+mwAPtbm7wbeliQDfa5q20qShlCXQFgOHOpbnmxtU/apqqPAc8BZA33eDdwx0HZbGy76rSkCRJI0j+blXUZJ3gJ8q6r29jVfXVVvAt7apvccZ9vNScaTjB85cmQeqpWkV6YugXAYOKdveUVrm7JPksXAacBTfes3MXB1UFWH29e/BT5Jb2jq+1TV9qpaW1VrR0ZGOpQrSZqNLoGwG1iVZGWSJfT+uI8N9BkDrmnzG4F7q6oAkvwQ8C767h8kWZxkWZt/FfBOYC+SpAUz7ecQqupokuuBXcAi4Naq2pdkGzBeVWPALcDtSSaAp+mFxjEXAYeq6kBf21JgVwuDRcCfAb8/J2ckSZqVTh9Mq6qdwM6Btq19888DVx5n2y8BFw60/R1w/gxrlSSdRD66QpIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqSmUyAkWZ9kf5KJJFumWL80yZ1t/X1JRlv7aJK/T7KnTf+jb5vzkzzUtvm9JJmrk5Ikzdy0gZBkEXAzcCmwGrgqyeqBbtcBz1TVecBNwI196x6tqjVt+qW+9g8DvwisatP62Z+GJOlEdblCWAdMVNWBqnoR2AFsGOizAfhYm78beNvL/Ys/ydnAa6vqy1VVwMeBK2ZcvSRpznQJhOXAob7lydY2ZZ+qOgo8B5zV1q1M8pUkf57krX39J6fZpyRpHi0+yft/Aji3qp5Kcj7wJ0neMJMdJNkMbAY499xzT0KJkiToFgiHgXP6lle0tqn6TCZZDJwGPNWGg14AqKr7kzwK/GTrv2KafdK22w5sB1i7dm11qFd6RRrdcs+8HevgDZfN27E0f7oMGe0GViVZmWQJsAkYG+gzBlzT5jcC91ZVJRlpN6VJ8jp6N48PVNUTwDeTXNjuNbwX+MwcnI8kaZamvUKoqqNJrgd2AYuAW6tqX5JtwHhVjQG3ALcnmQCephcaABcB25J8G3gJ+KWqerqt+2Xgo8Crgc+1SZK0QDrdQ6iqncDOgbatffPPA1dOsd2ngE8dZ5/jwBtnUqwk6eTxk8qSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAnoGAhJ1ifZn2QiyZYp1i9Ncmdbf1+S0dZ+SZL7kzzUvl7ct82X2j73tOlH5+qkJEkzt3i6DkkWATcDlwCTwO4kY1X1cF+364Bnquq8JJuAG4F3A98A/mVVfT3JG4FdwPK+7a6uqvE5OhdJ0gnocoWwDpioqgNV9SKwA9gw0GcD8LE2fzfwtiSpqq9U1ddb+z7g1UmWzkXhkqS51SUQlgOH+pYn+d5/5X9Pn6o6CjwHnDXQ518BD1TVC31tt7Xhot9KkqkOnmRzkvEk40eOHOlQriRpNublpnKSN9AbRvo3fc1XV9WbgLe26T1TbVtV26tqbVWtHRkZOfnFStIrVJdAOAyc07e8orVN2SfJYuA04Km2vAL4NPDeqnr02AZVdbh9/Vvgk/SGpiRJC6RLIOwGViVZmWQJsAkYG+gzBlzT5jcC91ZVJTkduAfYUlV/caxzksVJlrX5VwHvBPae2KlIkk7EtIHQ7glcT+8dQo8Ad1XVviTbklzeut0CnJVkAvg14NhbU68HzgO2Dry9dCmwK8mDwB56Vxi/P5cnJkmamWnfdgpQVTuBnQNtW/vmnweunGK7DwIfPM5uz+9epiTpZPOTypIkwECQJDUGgiQJMBAkSY2BIEkCOr7LSJKOZ3TLPfN6vIM3XDavx3sl8QpBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGR1dIOmX52Iy55RWCJAnoGAhJ1ifZn2QiyZYp1i9Ncmdbf1+S0b51H2jt+5O8o+s+JUnza9pASLIIuBm4FFgNXJVk9UC364Bnquo84CbgxrbtamAT8AZgPfDfkyzquE9J0jzqcg9hHTBRVQcAkuwANgAP9/XZAPzHNn838N+SpLXvqKoXgMeSTLT90WGfkjS05vP+xXzdu+gyZLQcONS3PNnapuxTVUeB54CzXmbbLvuUJM2joX+XUZLNwGZgGfD/kuxf4JK6WJYb+cZCHDg3zqj7Mpi7Omd47JmYts6TeOxp9R17Tr+fMzjuTJ1wnfP0/Z6yziF5rfud9Nd9Ds75J7p06hIIh4Fz+pZXtLap+kwmWQycBjw1zbbT7ROAqtoObE8yXlWjHepdcK3WtQtdx3Ssc25Z59yyzvnXZchoN7AqycokS+jdJB4b6DMGXNPmNwL3VlW19k3tXUgrgVXAX3bcpyRpHk17hVBVR5NcD+wCFgG3VtW+JNuA8aoaA24Bbm83jZ+m9wee1u8uejeLjwLvq6rvAEy1z7k/PUlSV53uIVTVTmDnQNvWvvnngSuPs+2HgA912ec0ts+g70I7VWq1zrllnXPLOudZeiM7kqRXOh9dIUkChjQQktya5Mkke/vazkzy+SRfa1/PWMgaW03nJPlikoeT7EvyK8NYa5IfTvKXSb7a6vyd1r6yPWpkoj16ZMlC1nlM+zT7V5J8ti0PXZ1JDiZ5KMmeJOOtbahe91bT6UnuTvJXSR5J8rNDWudPte/lsembSX51SGv9d+33aG+SO9rv19D9jM7GUAYC8FF6j7rotwX4QlWtAr7QlhfaUeDfV9Vq4ELgfe0RHMNW6wvAxVX108AaYH2SC+k9YuSm9siRZ+g9gmQY/ArwSN/ysNb5C1W1pu8th8P2ugP8LvA/q+r1wE/T+74OXZ1Vtb99L9cA5wPfAj7NkNWaZDnwb4G1VfVGem+K2cTw/ozOTFUN5QSMAnv7lvcDZ7f5s4H9C13jFDV/BrhkmGsFfgR4AHgLvQ/TLG7tPwvsGoL6VtD7xb8Y+CyQIa3zILBsoG2oXnd6nwd6jHavcFjrnKLutwN/MYy18g9PWTiT3ptyPgu8Yxh/RmczDesVwlR+rKqeaPN/DfzYQhYzqD3h9c3AfQxhrW0YZg/wJPB54FHg2eo9agSG5/Eh/xX4D8BLbfkshrPOAv40yf3t0/QwfK/7SuAIcFsbgvuDJK9h+OoctAm4o80PVa1VdRj4z8DjwBP0HtNzP8P5Mzpjp1IgfFf1Ynho3h6V5B8BnwJ+taq+2b9uWGqtqu9U73J8Bb0HDL5+gUv6PkneCTxZVfcvdC0d/HxV/Qy9J/a+L8lF/SuH5HVfDPwM8OGqejPwdwwMuQxJnd/Vxt4vB/5ocN0w1NruYWygF7Y/DryG7x/ePmWdSoHwN0nOBmhfn1zgegBI8ip6YfCJqvrj1jyUtQJU1bPAF+ld1p7eHjUCL/P4kHn0c8DlSQ4CO+gNG/0uw1fnsX8pUlVP0hvrXsfwve6TwGRV3deW76YXEMNWZ79LgQeq6m/a8rDV+s+Bx6rqSFV9G/hjej+3Q/czOhunUiD0Px7jGnrj9QsqSeh9SvuRqvovfauGqtYkI0lOb/Ovpnef4xF6wbCxdVvwOqvqA1W1onrPrNpE7xEoVzNkdSZ5TZJ/fGye3pj3Xobsda+qvwYOJfmp1vQ2ek8NGKo6B1zFPwwXwfDV+jhwYZIfab//x76nQ/UzOmsLfRPjODdu7qA3Pvdtev/KuY7eWPIXgK8BfwacOQR1/jy9S9gHgT1t+hfDVivwT4GvtDr3Altb++voPVtqgt4l+tKF/p721fzPgM8OY52tnq+2aR/wm619qF73VtMaYLy99n8CnDGMdbZaX0PvoZin9bUNXa3A7wB/1X6XbgeWDtvP6GwnP6ksSQJOrSEjSdJJZCBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAuD/A0vNJacmm88cAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.bar(bins[:-1], hist, width=(bins[1]-bins[0]) * 0.9)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Differentially private histograms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using `diffprivlib`, we can calculate a differentially private version of the histogram. For this example, we use the default settings:\n",
    "- `epsilon` is 1.0\n",
    "- `range` is not specified, so is calculated by the function on-the-fly. This throws a warning, as it leaks privacy about the data (from `dp_bins`, we know that there are people in the dataset aged 17 and 90)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      ".../site-packages/diffprivlib/tools/histograms.py:131: PrivacyLeakWarning: Range parameter has not been specified. Falling back to taking range from the data.\n",
      "To ensure differential privacy, and no additional privacy leakage, the range must be specified independently of the data (i.e., using domain knowledge).\n",
      "  \"specified independently of the data (i.e., using domain knowledge).\", PrivacyLeakWarning)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFQJJREFUeJzt3X+QXeV93/H3J5KlOG7NL20yRIKsPChxZbuRg5DJJGYaXGxRXMRMhS2GsaHDRM3ETJOmSS1PJkqj2jMw0ylNptS1EsA2sREEx/GOkas4xs4faU20YBkkiOpFqGhlEmR+OY0DWObbP+4j5/p6xZ5drXavzPs1c2bPec5zzvmevbv70XnOvUepKiRJ+qGFLkCSNBwMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJahYvdAEzsWzZshodHV3oMiTplHL//fd/o6pGput3SgXC6Ogo4+PjC12GJJ1SkvzfLv0cMpIkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBp9gnlXXqGN1yz7we7+ANl83r8aQfRF4hSJIAA0GS1BgIkiTAewg/8OZzLN9xfOnU5hWCJAkwECRJjYEgSQIMBElS403leeCNXUmnAq8QJEmAgSBJajoFQpL1SfYnmUiyZYr1FyV5IMnRJBv72n8hyZ6+6fkkV7R1H03yWN+6NXN3WpKkmZr2HkKSRcDNwCXAJLA7yVhVPdzX7XHgWuDX+7etqi8Ca9p+zgQmgD/t6/IbVXX3iZyAJGludLmpvA6YqKoDAEl2ABuA7wZCVR1s6156mf1sBD5XVd+adbWSpJOmy5DRcuBQ3/Jka5upTcAdA20fSvJgkpuSLJ1qoySbk4wnGT9y5MgsDitJ6mJebionORt4E7Crr/kDwOuBC4AzgfdPtW1Vba+qtVW1dmRk5KTXKkmvVF0C4TBwTt/yitY2E+8CPl1V3z7WUFVPVM8LwG30hqYkSQukSyDsBlYlWZlkCb2hn7EZHucqBoaL2lUDSQJcAeyd4T4lSXNo2kCoqqPA9fSGex4B7qqqfUm2JbkcIMkFSSaBK4GPJNl3bPsko/SuMP58YNefSPIQ8BCwDPjgiZ+OJGm2Oj26oqp2AjsH2rb2ze+mN5Q01bYHmeImdFVdPJNCT5T/x68kvTw/qSxJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElS0ykQkqxPsj/JRJItU6y/KMkDSY4m2Tiw7jtJ9rRprK99ZZL72j7vTLLkxE9HkjRb0wZCkkXAzcClwGrgqiSrB7o9DlwLfHKKXfx9Va1p0+V97TcCN1XVecAzwHWzqF+SNEe6XCGsAyaq6kBVvQjsADb0d6iqg1X1IPBSl4MmCXAxcHdr+hhwReeqJUlzrksgLAcO9S1PtraufjjJeJIvJzn2R/8s4NmqOjrLfUqS5tjieTjGT1TV4SSvA+5N8hDwXNeNk2wGNgOce+65J6lESVKXK4TDwDl9yytaWydVdbh9PQB8CXgz8BRwepJjgXTcfVbV9qpaW1VrR0ZGuh5WkjRDXQJhN7CqvStoCbAJGJtmGwCSnJFkaZtfBvwc8HBVFfBF4Ng7kq4BPjPT4iVJc2faQGjj/NcDu4BHgLuqal+SbUkuB0hyQZJJ4ErgI0n2tc3/CTCe5Kv0AuCGqnq4rXs/8GtJJujdU7hlLk9MkjQzne4hVNVOYOdA29a++d30hn0Gt/tfwJuOs88D9N7BJEkaAn5SWZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqSmUyAkWZ9kf5KJJFumWH9RkgeSHE2ysa99TZL/nWRfkgeTvLtv3UeTPJZkT5vWzM0pSZJmY9r/UznJIuBm4BJgEtidZKyqHu7r9jhwLfDrA5t/C3hvVX0tyY8D9yfZVVXPtvW/UVV3n+hJSJJO3LSBAKwDJqrqAECSHcAG4LuBUFUH27qX+jesqv/TN//1JE8CI8CzSJKGSpcho+XAob7lydY2I0nWAUuAR/uaP9SGkm5KsnSm+5QkzZ15uamc5GzgduBfV9Wxq4gPAK8HLgDOBN5/nG03JxlPMn7kyJH5KFeSXpG6BMJh4Jy+5RWtrZMkrwXuAX6zqr58rL2qnqieF4Db6A1NfZ+q2l5Va6tq7cjISNfDSpJmqEsg7AZWJVmZZAmwCRjrsvPW/9PAxwdvHrerBpIEuALYO5PCJUlza9pAqKqjwPXALuAR4K6q2pdkW5LLAZJckGQSuBL4SJJ9bfN3ARcB107x9tJPJHkIeAhYBnxwTs9MkjQjXd5lRFXtBHYOtG3tm99NbyhpcLs/BP7wOPu8eEaVSh2NbrlnXo938IbL5vV40sniJ5UlSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRLQMRCSrE+yP8lEki1TrL8oyQNJjibZOLDumiRfa9M1fe3nJ3mo7fP3kuTET0eSNFvTBkKSRcDNwKXAauCqJKsHuj0OXAt8cmDbM4HfBt4CrAN+O8kZbfWHgV8EVrVp/azPQpJ0wrpcIawDJqrqQFW9COwANvR3qKqDVfUg8NLAtu8APl9VT1fVM8DngfVJzgZeW1VfrqoCPg5ccaInI0mavS6BsBw41Lc82dq6ON62y9v8tPtMsjnJeJLxI0eOdDysJGmmhv6mclVtr6q1VbV2ZGRkocuRpB9YXQLhMHBO3/KK1tbF8bY93OZns09J0knQJRB2A6uSrEyyBNgEjHXc/y7g7UnOaDeT3w7sqqongG8mubC9u+i9wGdmUb8kaY5MGwhVdRS4nt4f90eAu6pqX5JtSS4HSHJBkkngSuAjSfa1bZ8G/hO9UNkNbGttAL8M/AEwATwKfG5Oz0ySNCOLu3Sqqp3AzoG2rX3zu/neIaD+frcCt07RPg68cSbFSpJOnqG/qSxJmh8GgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1nf7HNEndjG65Z96OdfCGy+btWHpl6HSFkGR9kv1JJpJsmWL90iR3tvX3JRlt7Vcn2dM3vZRkTVv3pbbPY+t+dC5PTJI0M9MGQpJFwM3ApcBq4Kokqwe6XQc8U1XnATcBNwJU1Seqak1VrQHeAzxWVXv6trv62PqqenIOzkeSNEtdrhDWARNVdaCqXgR2ABsG+mwAPtbm7wbeliQDfa5q20qShlCXQFgOHOpbnmxtU/apqqPAc8BZA33eDdwx0HZbGy76rSkCRJI0j+blXUZJ3gJ8q6r29jVfXVVvAt7apvccZ9vNScaTjB85cmQeqpWkV6YugXAYOKdveUVrm7JPksXAacBTfes3MXB1UFWH29e/BT5Jb2jq+1TV9qpaW1VrR0ZGOpQrSZqNLoGwG1iVZGWSJfT+uI8N9BkDrmnzG4F7q6oAkvwQ8C767h8kWZxkWZt/FfBOYC+SpAUz7ecQqupokuuBXcAi4Naq2pdkGzBeVWPALcDtSSaAp+mFxjEXAYeq6kBf21JgVwuDRcCfAb8/J2ckSZqVTh9Mq6qdwM6Btq19888DVx5n2y8BFw60/R1w/gxrlSSdRD66QpIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqSmUyAkWZ9kf5KJJFumWL80yZ1t/X1JRlv7aJK/T7KnTf+jb5vzkzzUtvm9JJmrk5Ikzdy0gZBkEXAzcCmwGrgqyeqBbtcBz1TVecBNwI196x6tqjVt+qW+9g8DvwisatP62Z+GJOlEdblCWAdMVNWBqnoR2AFsGOizAfhYm78beNvL/Ys/ydnAa6vqy1VVwMeBK2ZcvSRpznQJhOXAob7lydY2ZZ+qOgo8B5zV1q1M8pUkf57krX39J6fZpyRpHi0+yft/Aji3qp5Kcj7wJ0neMJMdJNkMbAY499xzT0KJkiToFgiHgXP6lle0tqn6TCZZDJwGPNWGg14AqKr7kzwK/GTrv2KafdK22w5sB1i7dm11qFd6RRrdcs+8HevgDZfN27E0f7oMGe0GViVZmWQJsAkYG+gzBlzT5jcC91ZVJRlpN6VJ8jp6N48PVNUTwDeTXNjuNbwX+MwcnI8kaZamvUKoqqNJrgd2AYuAW6tqX5JtwHhVjQG3ALcnmQCephcaABcB25J8G3gJ+KWqerqt+2Xgo8Crgc+1SZK0QDrdQ6iqncDOgbatffPPA1dOsd2ngE8dZ5/jwBtnUqwk6eTxk8qSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAnoGAhJ1ifZn2QiyZYp1i9Ncmdbf1+S0dZ+SZL7kzzUvl7ct82X2j73tOlH5+qkJEkzt3i6DkkWATcDlwCTwO4kY1X1cF+364Bnquq8JJuAG4F3A98A/mVVfT3JG4FdwPK+7a6uqvE5OhdJ0gnocoWwDpioqgNV9SKwA9gw0GcD8LE2fzfwtiSpqq9U1ddb+z7g1UmWzkXhkqS51SUQlgOH+pYn+d5/5X9Pn6o6CjwHnDXQ518BD1TVC31tt7Xhot9KkqkOnmRzkvEk40eOHOlQriRpNublpnKSN9AbRvo3fc1XV9WbgLe26T1TbVtV26tqbVWtHRkZOfnFStIrVJdAOAyc07e8orVN2SfJYuA04Km2vAL4NPDeqnr02AZVdbh9/Vvgk/SGpiRJC6RLIOwGViVZmWQJsAkYG+gzBlzT5jcC91ZVJTkduAfYUlV/caxzksVJlrX5VwHvBPae2KlIkk7EtIHQ7glcT+8dQo8Ad1XVviTbklzeut0CnJVkAvg14NhbU68HzgO2Dry9dCmwK8mDwB56Vxi/P5cnJkmamWnfdgpQVTuBnQNtW/vmnweunGK7DwIfPM5uz+9epiTpZPOTypIkwECQJDUGgiQJMBAkSY2BIEkCOr7LSJKOZ3TLPfN6vIM3XDavx3sl8QpBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGR1dIOmX52Iy55RWCJAnoGAhJ1ifZn2QiyZYp1i9Ncmdbf1+S0b51H2jt+5O8o+s+JUnza9pASLIIuBm4FFgNXJVk9UC364Bnquo84CbgxrbtamAT8AZgPfDfkyzquE9J0jzqcg9hHTBRVQcAkuwANgAP9/XZAPzHNn838N+SpLXvqKoXgMeSTLT90WGfkjS05vP+xXzdu+gyZLQcONS3PNnapuxTVUeB54CzXmbbLvuUJM2joX+XUZLNwGZgGfD/kuxf4JK6WJYb+cZCHDg3zqj7Mpi7Omd47JmYts6TeOxp9R17Tr+fMzjuTJ1wnfP0/Z6yziF5rfud9Nd9Ds75J7p06hIIh4Fz+pZXtLap+kwmWQycBjw1zbbT7ROAqtoObE8yXlWjHepdcK3WtQtdx3Ssc25Z59yyzvnXZchoN7AqycokS+jdJB4b6DMGXNPmNwL3VlW19k3tXUgrgVXAX3bcpyRpHk17hVBVR5NcD+wCFgG3VtW+JNuA8aoaA24Bbm83jZ+m9wee1u8uejeLjwLvq6rvAEy1z7k/PUlSV53uIVTVTmDnQNvWvvnngSuPs+2HgA912ec0ts+g70I7VWq1zrllnXPLOudZeiM7kqRXOh9dIUkChjQQktya5Mkke/vazkzy+SRfa1/PWMgaW03nJPlikoeT7EvyK8NYa5IfTvKXSb7a6vyd1r6yPWpkoj16ZMlC1nlM+zT7V5J8ti0PXZ1JDiZ5KMmeJOOtbahe91bT6UnuTvJXSR5J8rNDWudPte/lsembSX51SGv9d+33aG+SO9rv19D9jM7GUAYC8FF6j7rotwX4QlWtAr7QlhfaUeDfV9Vq4ELgfe0RHMNW6wvAxVX108AaYH2SC+k9YuSm9siRZ+g9gmQY/ArwSN/ysNb5C1W1pu8th8P2ugP8LvA/q+r1wE/T+74OXZ1Vtb99L9cA5wPfAj7NkNWaZDnwb4G1VfVGem+K2cTw/ozOTFUN5QSMAnv7lvcDZ7f5s4H9C13jFDV/BrhkmGsFfgR4AHgLvQ/TLG7tPwvsGoL6VtD7xb8Y+CyQIa3zILBsoG2oXnd6nwd6jHavcFjrnKLutwN/MYy18g9PWTiT3ptyPgu8Yxh/RmczDesVwlR+rKqeaPN/DfzYQhYzqD3h9c3AfQxhrW0YZg/wJPB54FHg2eo9agSG5/Eh/xX4D8BLbfkshrPOAv40yf3t0/QwfK/7SuAIcFsbgvuDJK9h+OoctAm4o80PVa1VdRj4z8DjwBP0HtNzP8P5Mzpjp1IgfFf1Ynho3h6V5B8BnwJ+taq+2b9uWGqtqu9U73J8Bb0HDL5+gUv6PkneCTxZVfcvdC0d/HxV/Qy9J/a+L8lF/SuH5HVfDPwM8OGqejPwdwwMuQxJnd/Vxt4vB/5ocN0w1NruYWygF7Y/DryG7x/ePmWdSoHwN0nOBmhfn1zgegBI8ip6YfCJqvrj1jyUtQJU1bPAF+ld1p7eHjUCL/P4kHn0c8DlSQ4CO+gNG/0uw1fnsX8pUlVP0hvrXsfwve6TwGRV3deW76YXEMNWZ79LgQeq6m/a8rDV+s+Bx6rqSFV9G/hjej+3Q/czOhunUiD0Px7jGnrj9QsqSeh9SvuRqvovfauGqtYkI0lOb/Ovpnef4xF6wbCxdVvwOqvqA1W1onrPrNpE7xEoVzNkdSZ5TZJ/fGye3pj3Xobsda+qvwYOJfmp1vQ2ek8NGKo6B1zFPwwXwfDV+jhwYZIfab//x76nQ/UzOmsLfRPjODdu7qA3Pvdtev/KuY7eWPIXgK8BfwacOQR1/jy9S9gHgT1t+hfDVivwT4GvtDr3Altb++voPVtqgt4l+tKF/p721fzPgM8OY52tnq+2aR/wm619qF73VtMaYLy99n8CnDGMdbZaX0PvoZin9bUNXa3A7wB/1X6XbgeWDtvP6GwnP6ksSQJOrSEjSdJJZCBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAuD/A0vNJacmm88cAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "dp_hist, dp_bins = dp.histogram(ages_adult)\n",
    "dp_hist = dp_hist / dp_hist.sum()\n",
    "\n",
    "plt.bar(dp_bins[:-1], dp_hist, width=(dp_bins[1] - dp_bins[0]) * 0.9)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Privacy Leak:** In this setting, we know for sure that at least one person in the dataset is aged 17, and another is aged 90."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(17.0, 90.0)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dp_bins[0], dp_bins[-1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Mirroring the behaviour of `np.histogram`:** We can see that the bins returned by `diffprivlib.tools.histogram` are identical to those given by `numpy.histogram`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.all(dp_bins == bins)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Error:** We can see very little difference in the values of the histgram. In fact, we see an aggregate absolute error across all bins of the order of 0.01%. This is expected, due to the large size of the dataset (`n=48842`). "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total histogram error: 0.000380\n"
     ]
    }
   ],
   "source": [
    "print(\"Total histogram error: %f\" % np.abs(hist - dp_hist).sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Effect of `epsilon`:** If we decrease `epsilon` (i.e. **increase** the privacy guarantee), the error will increase."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total histogram error: 0.242242\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      ".../site-packages/diffprivlib/tools/histograms.py:131: PrivacyLeakWarning: Range parameter has not been specified. Falling back to taking range from the data.\n",
      "To ensure differential privacy, and no additional privacy leakage, the range must be specified independently of the data (i.e., using domain knowledge).\n",
      "  \"specified independently of the data (i.e., using domain knowledge).\", PrivacyLeakWarning)\n"
     ]
    }
   ],
   "source": [
    "dp_hist, dp_bins = dp.histogram(ages_adult, epsilon=0.001)\n",
    "dp_hist = dp_hist / dp_hist.sum()\n",
    "\n",
    "print(\"Total histogram error: %f\" % np.abs(hist - dp_hist).sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Deciding on the `range` parameter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We know from the [dataset description](https://archive.ics.uci.edu/ml/datasets/adult) that everyone in the dataset is at least 17 years of age. We don't know off-hand what the upper bound is, so for this example we'll set the upper bound to `100`. As of 2019, less than 0.005% of the world's population is [aged over 100](https://en.wikipedia.org/wiki/Centenarian), so this is an appropriate simplification. Values in the dataset above 100 will be excluded from calculations."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An `epsilon` of 0.1 still preserves the broad structure of the histogram."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFPhJREFUeJzt3X2QXfV93/H3J5JFnGRs87DJED1Y8qDElXErl0Wmk5q2+EkMLmKmYIshRmSo1YyjaVrXqeVmglvFmYG2UxpPqYtiwODYFhTHZaeIqtTgTKctVMtDEYKqXoSCViZF5sluHIMVvv3j/uRcrlfs2dXVrhTer5k7e87v4ZzfuXN3P3se7jmpKiRJ+on5HoAk6fhgIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUrNwvgcwE6eddlotX758vochSSeUBx544DtVNTJduxMqEJYvX874+Ph8D0OSTihJ/qhLOw8ZSZIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkoAT7JvKJ6rlm++cs3Xtu/qC42bdkk4s7iFIkgADQZLUdAqEJGuT7EkykWTzFPWfSPJYkkeSfCPJW/vqNiT5Vntt6Cs/K8mutszPJclwNkmSNBvTBkKSBcB1wPnAKuDSJKsGmj0EjFbVXwZuB/5563sK8Bng3cAa4DNJTm59Pg98DFjZXmuPemskSbPWZQ9hDTBRVXur6mVgG7Cuv0FV3VtV32+z9wFL2vQHgbur6rmqeh64G1ib5HTgTVV1X1UVcAtw0RC2R5I0S10CYTGwv29+spUdyZXAXdP0Xdymuy5TknSMDfWy0yS/DIwCf2OIy9wIbARYtmzZsBYrSRrQZQ/hALC0b35JK3uVJO8DfhO4sKpemqbvAf78sNIRlwlQVVurarSqRkdGpn0CnCRplroEwk5gZZIVSRYB64Gx/gZJ3gVcTy8Mnumr2gF8IMnJ7WTyB4AdVfU08N0k57Sriy4H7hjC9kiSZmnaQ0ZVdSjJJnp/3BcAN1bV7iRbgPGqGgP+BfAzwL9vV48+VVUXVtVzSX6bXqgAbKmq59r0x4EvAm+kd87hLo6hufzGLvitXUknnk7nEKpqO7B9oOyqvun3vUbfG4EbpygfB87sPFJJ0jHlN5UlSYCBIElqDARJEmAgSJIaA0GSBPiAHB0jXuYrnXjcQ5AkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqekUCEnWJtmTZCLJ5inqz03yYJJDSS7uK/9bSR7ue/0gyUWt7otJnuyrWz28zZIkzdS09zJKsgC4Dng/MAnsTDJWVY/1NXsKuAL4ZH/fqroXWN2WcwowAfznvia/UVW3H80GSJKGo8vN7dYAE1W1FyDJNmAd8KNAqKp9re6V11jOxcBdVfX9WY9WknTMdDlktBjY3zc/2cpmaj3w1YGy30nySJJrk5w0i2VKkoZkTk4qJzkdeCewo6/408DbgbOBU4BPHaHvxiTjScYPHjx4zMcqSa9XXQLhALC0b35JK5uJDwNfr6ofHi6oqqer5yXgJnqHpn5MVW2tqtGqGh0ZGZnhaiVJXXUJhJ3AyiQrkiyid+hnbIbruZSBw0Vtr4EkAS4CHp3hMiVJQzRtIFTVIWATvcM9jwO3VdXuJFuSXAiQ5Owkk8AlwPVJdh/un2Q5vT2MPxxY9JeT7AJ2AacBnz36zZEkzVanR2hW1XZg+0DZVX3TO+kdSpqq7z6mOAldVefNZKCSpGPLbypLkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUtMpEJKsTbInyUSSzVPUn5vkwSSHklw8UPdnSR5ur7G+8hVJ7m/LvLU9nlOSNE+mDYQkC4DrgPOBVcClSVYNNHsKuAL4yhSL+NOqWt1eF/aVXwNcW1VnAM8DV85i/JKkIemyh7AGmKiqvVX1MrANWNffoKr2VdUjwCtdVpokwHnA7a3oZuCizqOWJA1dl0BYDOzvm59kimckv4afTDKe5L4kh//onwq8UFWHZrlMSdKQLZyDdby1qg4keRtwT5JdwItdOyfZCGwEWLZs2TEaoiSpyx7CAWBp3/ySVtZJVR1oP/cC3wTeBTwLvCXJ4UA64jKramtVjVbV6MjISNfVSpJmqEsg7ARWtquCFgHrgbFp+gCQ5OQkJ7Xp04BfAh6rqgLuBQ5fkbQBuGOmg5ckDc+0gdCO828CdgCPA7dV1e4kW5JcCJDk7CSTwCXA9Ul2t+5/CRhP8r/oBcDVVfVYq/sU8IkkE/TOKdwwzA2TJM1Mp3MIVbUd2D5QdlXf9E56h30G+/134J1HWOZeelcwSZKOA35TWZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJKaToGQZG2SPUkmkmyeov7cJA8mOZTk4r7y1Un+R5LdSR5J8pG+ui8meTLJw+21ejibJEmajWmfmJZkAXAd8H5gEtiZZKzvUZgATwFXAJ8c6P594PKq+laSnwceSLKjql5o9b9RVbcf7UZIko5el0dorgEm2iMvSbINWAf8KBCqal+re6W/Y1X9n77pbyd5BhgBXkCSdFzpcshoMbC/b36ylc1IkjXAIuCJvuLfaYeSrk1y0kyXKUkanjk5qZzkdOBLwK9U1eG9iE8DbwfOBk4BPnWEvhuTjCcZP3jw4FwMV5Jel7oEwgFgad/8klbWSZI3AXcCv1lV9x0ur6qnq+cl4CZ6h6Z+TFVtrarRqhodGRnpulpJ0gx1CYSdwMokK5IsAtYDY10W3tp/Hbhl8ORx22sgSYCLgEdnMnBJ0nBNGwhVdQjYBOwAHgduq6rdSbYkuRAgydlJJoFLgOuT7G7dPwycC1wxxeWlX06yC9gFnAZ8dqhbJkmakS5XGVFV24HtA2VX9U3vpHcoabDf7wO/f4RlnjejkUqSjim/qSxJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNZ1ufy2dSJZvvnNO17fv6gvmdH3SseIegiQJ6BgISdYm2ZNkIsnmKerPTfJgkkNJLh6o25DkW+21oa/8rCS72jI/1x6lKUmaJ9MGQpIFwHXA+cAq4NIkqwaaPQVcAXxloO8pwGeAdwNrgM8kOblVfx74GLCyvdbOeiskSUetyx7CGmCiqvZW1cvANmBdf4Oq2ldVjwCvDPT9IHB3VT1XVc8DdwNrk5wOvKmq7quqAm4BLjrajZEkzV6XQFgM7O+bn2xlXRyp7+I2PZtlSpKOgeP+pHKSjUnGk4wfPHhwvocjSX9hdQmEA8DSvvklrayLI/U90KanXWZVba2q0aoaHRkZ6bhaSdJMdQmEncDKJCuSLALWA2Mdl78D+ECSk9vJ5A8AO6rqaeC7Sc5pVxddDtwxi/FLkoZk2kCoqkPAJnp/3B8Hbquq3Um2JLkQIMnZSSaBS4Drk+xufZ8DfpteqOwEtrQygI8DXwAmgCeAu4a6ZZKkGen0TeWq2g5sHyi7qm96J68+BNTf7kbgxinKx4EzZzJYSdKxc9yfVJYkzQ0DQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEtAxEJKsTbInyUSSzVPUn5Tk1lZ/f5LlrfyyJA/3vV5JsrrVfbMt83Ddzw5zwyRJMzNtICRZAFwHnA+sAi5Nsmqg2ZXA81V1BnAtcA1AVX25qlZX1Wrgo8CTVfVwX7/LDtdX1TND2B5J0ix12UNYA0xU1d6qehnYBqwbaLMOuLlN3w68N0kG2lza+kqSjkNdAmExsL9vfrKVTdmmqg4BLwKnDrT5CPDVgbKb2uGi35oiQCRJc2hOTioneTfw/ap6tK/4sqp6J/Ce9vroEfpuTDKeZPzgwYNzMFpJen3qEggHgKV980ta2ZRtkiwE3gw821e/noG9g6o60H5+D/gKvUNTP6aqtlbVaFWNjoyMdBiuJGk2ugTCTmBlkhVJFtH74z420GYM2NCmLwbuqaoCSPITwIfpO3+QZGGS09r0G4APAY8iSZo3C6drUFWHkmwCdgALgBuraneSLcB4VY0BNwBfSjIBPEcvNA47F9hfVXv7yk4CdrQwWAD8F+D3hrJFkqRZmTYQAKpqO7B9oOyqvukfAJccoe83gXMGyv4EOGuGY5UkHUN+U1mSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEdPymsqRulm++c87Wte/qC+ZsXXp9cA9BkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqOgVCkrVJ9iSZSLJ5ivqTktza6u9PsryVL0/yp0kebq9/19fnrCS7Wp/PJcmwNkqSNHPTBkKSBcB1wPnAKuDSJKsGml0JPF9VZwDXAtf01T1RVavb61f7yj8PfAxY2V5rZ78ZkqSj1WUPYQ0wUVV7q+plYBuwbqDNOuDmNn078N7X+o8/yenAm6rqvqoq4BbgohmPXpI0NF0CYTGwv29+spVN2aaqDgEvAqe2uhVJHkryh0ne09d+cpplSpLm0LG+dcXTwLKqejbJWcB/SPKOmSwgyUZgI8CyZcuOwRAlSdBtD+EAsLRvfkkrm7JNkoXAm4Fnq+qlqnoWoKoeAJ4AfqG1XzLNMmn9tlbVaFWNjoyMdBiuJGk2ugTCTmBlkhVJFgHrgbGBNmPAhjZ9MXBPVVWSkXZSmiRvo3fyeG9VPQ18N8k57VzD5cAdQ9geSdIsTXvIqKoOJdkE7AAWADdW1e4kW4DxqhoDbgC+lGQCeI5eaACcC2xJ8kPgFeBXq+q5Vvdx4IvAG4G72kuSNE86nUOoqu3A9oGyq/qmfwBcMkW/rwFfO8Iyx4EzZzJYSdKx4zeVJUmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKnpFAhJ1ibZk2QiyeYp6k9Kcmurvz/J8lb+/iQPJNnVfp7X1+ebbZkPt9fPDmujJEkzN+0T09ozka8D3g9MAjuTjFXVY33NrgSer6ozkqwHrgE+AnwH+NtV9e0kZ9J7DOfivn6XtSenSZLmWZc9hDXARFXtraqXgW3AuoE264Cb2/TtwHuTpKoeqqpvt/LdwBuTnDSMgUuShqtLICwG9vfNT/Lq//Jf1aaqDgEvAqcOtPk7wINV9VJf2U3tcNFvJcmMRi5JGqo5Oamc5B30DiP9vb7iy6rqncB72uujR+i7Mcl4kvGDBw8e+8FK0uvUtOcQgAPA0r75Ja1sqjaTSRYCbwaeBUiyBPg6cHlVPXG4Q1UdaD+/l+Qr9A5N3TK48qraCmwFGB0drW6bJb3+LN9855yta9/VF8zZujR3uuwh7ARWJlmRZBGwHhgbaDMGbGjTFwP3VFUleQtwJ7C5qv7b4cZJFiY5rU2/AfgQ8OjRbYok6WhMGwjtnMAmelcIPQ7cVlW7k2xJcmFrdgNwapIJ4BPA4UtTNwFnAFcNXF56ErAjySPAw/T2MH5vmBsmSZqZLoeMqKrtwPaBsqv6pn8AXDJFv88Cnz3CYs/qPkxJ0rHmN5UlSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpKbT7a8l6Ujm8klt4NPajiX3ECRJQMdASLI2yZ4kE0k2T1F/UpJbW/39SZb31X26le9J8sGuy5Qkza1pAyHJAuA64HxgFXBpklUDza4Enq+qM4BrgWta31X0nsH8DmAt8G+TLOi4TEnSHOpyDmENMFFVewGSbAPWAY/1tVkH/NM2fTvwb5KklW+rqpeAJ9szl9e0dtMtU5Jek+cvhqvLIaPFwP6++clWNmWbqjoEvAic+hp9uyxTkjSHjvurjJJsBDa22f+XZM98jmcapwHfAcg18zOA+VrvDNf9o/dpHtY9dMdw3a/5Pv0F3ebZrHvon6cZrPtE8dYujboEwgFgad/8klY2VZvJJAuBNwPPTtN3umUCUFVbga0dxjnvkoxX1eh8j+N45/vUje9TN75Pw9PlkNFOYGWSFUkW0TtJPDbQZgzY0KYvBu6pqmrl69tVSCuAlcD/7LhMSdIcmnYPoaoOJdkE7AAWADdW1e4kW4DxqhoDbgC+1E4aP0fvDzyt3W30ThYfAn6tqv4MYKplDn/zJEldpfePvIYhycZ2iEuvwfepG9+nbnyfhsdAkCQB3rpCktQYCLOQZGmSe5M8lmR3kl9v5ackuTvJt9rPk+d7rMeD9u30h5L8xza/ot3iZKLd8mTRfI/xeJDkLUluT/K/kzye5K/5mfpxSf5h+717NMlXk/ykn6nhMBBm5xDwj6pqFXAO8Gvt1hubgW9U1UrgG21e8OvA433z1wDXtludPE/v1ieC3wX+U1W9Hfgr9N4zP1N9kiwG/j4wWlVn0rsoZT1+pobCQJiFqnq6qh5s09+j94u7mN7tN25uzW4GLpqfER4/kiwBLgC+0OYDnEfvFifg+wRAkjcD59K7Yo+qermqXsDP1FQWAm9s33n6KeBp/EwNhYFwlNqdXd8F3A/8XFU93ar+GPi5eRrW8eRfA/8YeKXNnwq80G5xAt625LAVwEHgpnZ47QtJfho/U69SVQeAfwk8RS8IXgQewM/UUBgIRyHJzwBfA/5BVX23v659Me91fQlXkg8Bz1TVA/M9lhPAQuCvAp+vqncBf8LA4SE/U9DOoayjF6A/D/w0vTspawgMhFlK8gZ6YfDlqvqDVvx/k5ze6k8Hnpmv8R0nfgm4MMk+YBu93frfBd7SdvfhNW5b8jozCUxW1f1t/nZ6AeFn6tXeBzxZVQer6ofAH9D7nPmZGgIDYRbacfAbgMer6l/1VfXfwmMDcMdcj+14UlWfrqolVbWc3om/e6rqMuBeerc4Ad8nAKrqj4H9SX6xFb2X3jf8/Uy92lPAOUl+qv0eHn6f/EwNgV9Mm4Ukfx34r8Au/vzY+D+hdx7hNmAZ8EfAh6vquXkZ5HEmyd8EPllVH0ryNnp7DKcADwG/3J6Z8bqWZDW9k++LgL3Ar9D7p83PVJ8k/wz4CL2r/R4C/i69cwZ+po6SgSBJAjxkJElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJAPx/SJ+Gklel4r4AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "dp_hist2, dp_bins2 = dp.histogram(ages_adult, epsilon=0.1, range=(17, 100))\n",
    "dp_hist2 = dp_hist2 / dp_hist2.sum()\n",
    "\n",
    "plt.bar(dp_bins2[:-1], dp_hist2, width=(dp_bins2[1] - dp_bins2[0]) * 0.9)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Error for smaller datasets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's repeate the first experiments above with a smaller dataset, this time the [Cleveland heart disease dataset](https://archive.ics.uci.edu/ml/datasets/heart+Disease) from the UCI Repository. This dataset has 303 samples, a small fractin of the Adult dataset processed previously."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "ages_heart = np.loadtxt(\"https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data\",\n",
    "                        usecols=0, delimiter=\",\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We first find the histogram distribution using `numpy.histogram`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "heart_hist, heart_bins = np.histogram(ages_heart)\n",
    "heart_hist = heart_hist / heart_hist.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And then find the histogram distribution using `diffprivlib.histogram`, using the defaults as before (with the accompanying warning)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      ".../site-packages/diffprivlib/tools/histograms.py:131: PrivacyLeakWarning: Range parameter has not been specified. Falling back to taking range from the data.\n",
      "To ensure differential privacy, and no additional privacy leakage, the range must be specified independently of the data (i.e., using domain knowledge).\n",
      "  \"specified independently of the data (i.e., using domain knowledge).\", PrivacyLeakWarning)\n"
     ]
    }
   ],
   "source": [
    "dp_heart_hist, dp_heart_bins = dp.histogram(ages_heart)\n",
    "dp_heart_hist = dp_heart_hist / dp_heart_hist.sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And double-check that the bins are the same."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.all(heart_bins == dp_heart_bins)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We then see that the error this time is 3%, a 100-fold increase in error."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total histogram error: 0.031932\n"
     ]
    }
   ],
   "source": [
    "print(\"Total histogram error: %f\" % np.abs(heart_hist - dp_heart_hist).sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Mirroring Numpy's behaviour"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can evaluate `diffprivlib.models.histogram` without any privacy by setting `epsilon = float(\"inf\")`. This should give the exact same result as running `numpy.histogram`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      ".../site-packages/diffprivlib/tools/histograms.py:131: PrivacyLeakWarning: Range parameter has not been specified. Falling back to taking range from the data.\n",
      "To ensure differential privacy, and no additional privacy leakage, the range must be specified independently of the data (i.e., using domain knowledge).\n",
      "  \"specified independently of the data (i.e., using domain knowledge).\", PrivacyLeakWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "heart_hist, _ = np.histogram(ages_heart)\n",
    "dp_heart_hist, _ = dp.histogram(ages_heart, epsilon=float(\"inf\"))\n",
    "\n",
    "np.all(heart_hist == dp_heart_hist)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
